library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
J.Pennington et al., Glove : Global Vectors for Word Representation, 2014. * Word Representation 방법 중 하나
유사한 업종은 어떤것이 있는지 알아보자
버닝썬 법인 등기부 등본 > https://monthly.chosun.com/client/mdaily/daily_view.asp?Idx=6514&Newsnumb=2019036514
통계청 표준산업분류 > https://kssc.kostat.go.kr:8443/ksscNew_web/kssc/ccc/forwardPage.do?gubun=001_1#
어떤 작업을 해야 하나?
의미와 문법 등에 대한 정보를 잘 인식함
Full Document 기반
Window 기반
labels <- c("I","like","enjoy","deep","learning","NLP","flying",".")
coMatrix <- c(0,2,1,0,0,0,0,0,
2,0,0,1,0,1,0,0,
1,0,0,0,0,0,1,0,
0,1,0,0,1,0,0,0,
0,0,0,1,0,0,0,1,
0,1,0,0,0,0,0,1,
0,0,1,0,0,0,0,1,
0,0,0,0,1,1,1,0) %>%
matrix(nrow = 8,ncol = 8, byrow = TRUE)
singular_value <- svd(coMatrix, nu = 2, nv = 2) # 2차원으로 변환
f1 <- singular_value$u[,1]
f2 <- singular_value$u[,2]
nm <- labels
df <- tibble(f1, f2, nm)
# 3/ custom geom_label like any other geom.
ggplot(df, aes(x=f1, y=f2)) +
geom_point() +
geom_label(label=nm, nudge_x = 0, nudge_y = 0)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p1 <- plot_ly(z = coMatrix, type = "heatmap")
p1
u <- singular_value$u
d <- diag(singular_value$d[1:2])
v <- t(singular_value$v) %>% matrix(nrow = 2)
coMatrix_hat <- (u %*% d %*% v)
p2 <- plot_ly(
x = labels,
y = labels,
z = coMatrix_hat,
type = "heatmap"
)
p2
*Pik 의 정의는 i단어가 나타날 경우 / ik 단어가 동시에 나타날 경우임